import { OpenAI } from "openai";
import { getLogger, waitFor } from "../../utils";
import getConfig from "../../config";
import { insertRecords, query } from "../../db/clickhouse";
import { chunk } from "lodash";
import { Task } from "..";

const task: Task = {
  cron: '30 * * * *',
  singleInstance: true,
  callback: async () => {
    const logger = getLogger('PullRequestAnalysisTask');
    const config: any = await getConfig();

    const concurrentRequestNumber = 10;
    const qualityOptions = ['Very Poor', 'Poor', 'Fair', 'Good', 'Excellent'];

    interface InputPullRequest {
      id: number;
      platform: string;
      repoName: string;
      number: number;
      title: string;
      body: string;
      diff: string;
    }

    interface OutputPullRequest {
      id: number;
      platform: string;
      primaryLanguage: string;
      codeQuality: string;
      titleDescQuality: string;
      prType: string;
      valueLevel: number;
      isAutomaticallyGenerated: string;
      hostileOrAbuse: string;
    }

    const openai = new OpenAI({
      apiKey: config.qwen.token,
      baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
    });

    const createPullInfoTable = async () => {
      const sql = `
    CREATE TABLE IF NOT EXISTS pull_info
    (
      \`id\` UInt64,
      \`platform\` LowCardinality(String),
      \`code_quality\` Enum('Excellent' = 1, 'Good' = 2, 'Fair' = 3, 'Poor' = 4, 'Very Poor' = 5),
      \`pr_title_and_description_quality\` Enum('Excellent' = 1, 'Good' = 2, 'Fair' = 3, 'Poor' = 4, 'Very Poor' = 5),
      \`pr_type\` LowCardinality(String),
      \`value_level\` UInt8,
      \`primary_language\` LowCardinality(String),
      \`is_automatically_generated\` Enum('Yes' = 1, 'Uncertain' = 2, 'No' = 3),
      \`hostile_or_abuse\` Enum('No' = 1, 'Yes' = 2)
    )
    ENGINE = ReplacingMergeTree
    ORDER BY (id, platform)
    SETTINGS index_granularity = 8192`;
      await query(sql);
    };

    const analyzePullRequest = async (pullRequest: InputPullRequest): Promise<OutputPullRequest | null> => {
      const prompt = `
You are an advanced code review assistant responsible for conducting a detailed analysis of a GitHub Pull Request (PR).
Please analyze the provided PR data and return the results based on the following framework.
Only return the results, do not return any other text.

# Analysis Framework:

## Submission Quality Analysis

- Code Quality: [Excellent/Good/Fair/Poor/Very Poor]
(Evaluate code style, naming conventions, code quantity and code complexity etc. If there is no code in the PR, return Very Poor. If the code is not good, return Poor.)

- PR Title and Description Quality: [Excellent/Good/Fair/Poor/Very Poor]
(Evaluate title conciseness, description detail, and adherence to project standards. If there is no title or description in the PR, return Very Poor.)

## PR Type Classification

- PR Type: [Feature/Refactor/Docs/Fix/Chore/Other]
(Classify the PR based on its content and purpose.)

## PR Value Assessment

- Value Level: [1/2/3/4/5]
(Assess the PR's overall value based on description, code quality, code quantity, and impact. 1 = Highest, 5 = Lowest.)

## Primary Programming Language

- Primary Language: [Python/Java/JavaScript/...]
(Identify the main programming language used in the PR.)

## PR Automatically Generated Assessment

- Is Automatically Generated: [Yes/No/Uncertain]
(Determine if the PR is likely generated by an automated tool based on patterns in the title, description, commit log, or code changes.)

## Hostile or Abuse Assessment

- Hostile or Abuse: [Yes/No]
(Determine if the PR is hostile or abusive based on the title, description, code changes, and commit log. If the PR is hostile or abusive, return Yes. Otherwise, return No. PLEASE NOTICE THAT simply show mood or emotions are not supposed to be treated as hostile or abusvie, ONLY strong hostile words or advertisement not related to the code base are included.)

## Return the detailed analysis results in the following format:

Code Quality: [Excellent/Good/Fair/Poor/Very Poor]
PR Title and Description Quality: [Excellent/Good/Fair/Poor/Very Poor]
PR Type: [Feature/Refactor/Docs/Fix/Chore/Other]
Value Level: [1/2/3/4/5]
Primary Language: [Python/Java/JavaScript/Unknown...]
Is Automatically Generated: [Yes/Uncertain]
Hostile or Abuse: [Yes/No]

# Example Output:

Code Quality: Good
PR Title and Description Quality: Good
PR Type: Feature
Value Level: 3
Primary Language: Python
Is Automatically Generated: Uncertain
Hostile or Abuse: No

# PR Data:

Title: ${pullRequest.title}
Description: ${pullRequest.body}
Git Diff: ${pullRequest.diff}
    `;

      try {

        const response = await openai.chat.completions.create({
          model: 'qwen3-32b',
          enable_thinking: false,
          messages: [{ role: 'user', content: prompt }],
        } as any);

        const resultStr = response.choices[0].message.content!;
        // extract data from the returned string content
        // Use regex to extract data from the returned string content
        const outputPullRequest: Partial<OutputPullRequest> = {
          id: pullRequest.id,
          platform: pullRequest.platform,
        };

        // Helper to extract each line by key
        function extractValue(regex: RegExp, str: string, values?: string[]) {
          const match = str.match(regex);
          const ret = match ? match[1].trim() : undefined;
          if (values && ret && !values.includes(ret)) {
            throw new Error(`Invalid value: ${ret}`);
          }
          return ret;
        }

        outputPullRequest.codeQuality = extractValue(/Code Quality:\s*([^\n]+)/i, resultStr, qualityOptions);
        outputPullRequest.titleDescQuality = extractValue(/PR Title and Description Quality:\s*([^\n]+)/i, resultStr, qualityOptions);
        outputPullRequest.prType = extractValue(/PR Type:\s*([^\n]+)/i, resultStr);
        outputPullRequest.valueLevel = parseInt(extractValue(/Value Level:\s*([^\n]+)/i, resultStr, ['1', '2', '3', '4', '5']) || '0');
        outputPullRequest.primaryLanguage = extractValue(/Primary Language:\s*([^\n]+)/i, resultStr);
        outputPullRequest.isAutomaticallyGenerated = extractValue(/Is Automatically Generated:\s*([^\n]+)/i, resultStr, ['Yes', 'Uncertain', 'No']);
        outputPullRequest.hostileOrAbuse = extractValue(/Hostile or Abuse:\s*([^\n]+)/i, resultStr, ['Yes', 'No']);

        return (outputPullRequest as OutputPullRequest);
      } catch (e) {
        if (e instanceof Error && e.message.includes('Input data may contain inappropriate content.')) {
          // inappropriate content, return default values
          return {
            id: pullRequest.id,
            platform: pullRequest.platform,
            primaryLanguage: 'Unknown',
            codeQuality: 'Very Poor',
            titleDescQuality: 'Very Poor',
            prType: 'Other',
            valueLevel: 5,
            isAutomaticallyGenerated: 'Uncertain',
            hostileOrAbuse: 'Yes',
          } as OutputPullRequest;
        }
        logger.error(`Error analyzing pull request ${pullRequest.id}: ${e}`);
        return null;
      }
    };

    const getPullRequests = async (num: number): Promise<InputPullRequest[]> => {
      // try to get pull requests from label data first
      const q = `SELECT id, platform, substring(diff, 1, 10000)
    FROM pull_diff WHERE status = 'normal' AND (platform, id) NOT IN (SELECT platform, id FROM pull_info)
    AND (platform, id) IN (SELECT platform, id FROM pulls_with_label)
    LIMIT ${num}`;
      let diffs = await query(q);
      if (diffs.length === 0) {
        diffs = await query(`SELECT id, platform, substring(diff, 1, 10000)
    FROM pull_diff WHERE status = 'normal' AND (platform, id) NOT IN (SELECT platform, id FROM pull_info)
    LIMIT ${num}`);
        if (diffs.length === 0) {
          return [];
        }
      }
      const diffsObj = diffs.map(item => ({ id: +item[0], platform: item[1], diff: item[2] }));
      const pullInfo = await query(`SELECT issue_id, platform, any(repo_name), any(issue_number), argMax(issue_title, created_at), argMax(body, created_at)
    FROM events WHERE type = 'PullRequestEvent' AND (platform, issue_id) IN (${diffsObj.map(item => `('${item.platform}', ${item.id})`).join(',')})
    GROUP BY issue_id, platform
    `);
      const pullInfoObj = pullInfo.map(item => ({ id: +item[0], platform: item[1], repoName: item[2], number: item[3], title: item[4], body: item[5] }));
      const ret: InputPullRequest[] = [];
      for (const item of diffsObj) {
        const pullInfoItem = pullInfoObj.find(p => p.id === item.id && p.platform === item.platform);
        if (!pullInfoItem) {
          continue;
        }
        ret.push({
          id: +item.id,
          platform: item.platform,
          repoName: pullInfoItem.repoName,
          number: pullInfoItem.number,
          diff: item.diff,
          title: pullInfoItem.title,
          body: pullInfoItem.body,
        });
      }
      return ret;
    };

    const savePullRequests = async (pullRequests: Array<OutputPullRequest | null>) => {
      const pulls = pullRequests.filter(p => p !== null) as OutputPullRequest[];
      await insertRecords(pulls.map(p => ({
        id: p.id,
        platform: p.platform,
        code_quality: p.codeQuality,
        pr_title_and_description_quality: p.titleDescQuality,
        pr_type: p.prType,
        value_level: p.valueLevel,
        primary_language: p.primaryLanguage,
        is_automatically_generated: p.isAutomaticallyGenerated,
        hostile_or_abuse: p.hostileOrAbuse,
      })), 'pull_info');
    };

    await createPullInfoTable();

    let pullRequests = await getPullRequests(concurrentRequestNumber * 60);

    do {
      logger.info(`Found ${pullRequests.length} pull requests to analyze.`);
      const chunks = chunk(pullRequests, concurrentRequestNumber);
      for (const chunk of chunks) {
        Promise.all(chunk.map(p => analyzePullRequest(p))).then(outputPullRequests => savePullRequests(outputPullRequests));
        await waitFor(2000);
      }
      pullRequests = await getPullRequests(concurrentRequestNumber * 60);
    } while (pullRequests.length > 0)

    logger.info('PullRequestAnalysisTask done.');
  }
};

module.exports = task;
